{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all' # default is ‘last_expr’\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'10.0.0'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import azure.batch\n",
    "azure.batch.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "from azure.batch import BatchServiceClient\n",
    "from azure.batch.batch_auth import SharedKeyCredentials\n",
    "from azure.batch.models import *\n",
    "from azure.common.credentials import ServicePrincipalCredentials"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Azure Batch\n",
    "\n",
    "Documentation\n",
    "- https://github.com/Azure-Samples/batch-python-quickstart/blob/master/src/python_quickstart_client.py\n",
    "- https://docs.microsoft.com/en-us/azure/batch/batch-docker-container-workloads#prefetch-images-for-container-configuration\n",
    "\n",
    "TODO\n",
    "\n",
    "- Turn `enable_auto_scale` on and set the appropriate `auto_scale_formula`. This way we can cap the maximum available nodes. https://docs.microsoft.com/en-us/azure/batch/batch-automatic-scaling\n",
    "\n",
    "## Create a pool for each instance of the API\n",
    "\n",
    "List all Batch supported images with their \"capabilities\" (e.g. \"DockerCompatible\", \"NvidiaTeslaDriverInstalled\"):\n",
    "```\n",
    "az batch pool supported-images list\n",
    "```\n",
    "with the pool information provided in additional parameters.\n",
    "\n",
    "Listing all versions of a SKU of image:\n",
    "```\n",
    "az vm image list --all --publisher microsoft-dsvm\n",
    "```\n",
    "\n",
    "You may need to accept the terms of an image:\n",
    "```\n",
    "az vm image list --all --publisher <publisher>\n",
    "```\n",
    "to find the URN for the image you want to use, followed by:\n",
    "\n",
    "```\n",
    "az vm image terms accept --urn <corresponding-urn>\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.environ['BATCH_ACCOUNT_NAME'] = ''\n",
    "os.environ['BATCH_ACCOUNT_URL'] = ''\n",
    "\n",
    "os.environ['APP_CLIENT_ID'] = ''\n",
    "os.environ['APP_CLIENT_SECRET'] = ''\n",
    "os.environ['APP_TENANT_ID'] = ''\n",
    "\n",
    "os.environ['REGISTRY_SERVER'] = '.azurecr.io'  # e.g. registryname.azurecr.io\n",
    "os.environ['REGISTRY_USERNAME'] = ''\n",
    "os.environ['REGISTRY_PASSWORD'] = ''\n",
    "os.environ['REGISTRY_IMAGE_NAME'] = '.azurecr.io/tensorflow:1.14.0-gpu-py3' # login server/repository:tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "POOL_ID = 'internal_2'\n",
    "assert len(POOL_ID) <= 64, 'pool_id has more than 64 characters'\n",
    "\n",
    "POOL_NODE_COUNT = 1\n",
    "\n",
    "POOL_VM_SIZE = 'Standard_NC6s_v3'  # https://docs.microsoft.com/en-us/azure/virtual-machines/ncv3-series\n",
    "\n",
    "registry_server = os.environ['REGISTRY_SERVER']\n",
    "registry_username = os.environ['REGISTRY_USERNAME']\n",
    "registry_password = os.environ['REGISTRY_PASSWORD']\n",
    "docker_image = os.environ['REGISTRY_IMAGE_NAME']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_batch_exception(batch_exception):\n",
    "    \"\"\"\n",
    "    Prints the contents of the specified Batch exception.\n",
    "    \"\"\"\n",
    "    print('-------------------------------------------')\n",
    "    print('Exception encountered:')\n",
    "    if batch_exception.error and \\\n",
    "            batch_exception.error.message and \\\n",
    "            batch_exception.error.message.value:\n",
    "        print(batch_exception.error.message.value)\n",
    "        if batch_exception.error.values:\n",
    "            print()\n",
    "            for mesg in batch_exception.error.values:\n",
    "                print(f'{mesg.key}:\\t{mesg.value}')\n",
    "    print('-------------------------------------------')\n",
    "    \n",
    "def create_pool(batch_service_client, pool_id):\n",
    "    \"\"\"\n",
    "    Create a pool with pool_id and the Docker image specified in the env variables.\n",
    "    \"\"\"\n",
    "    image_ref = ImageReference(\n",
    "        publisher=\"microsoft-azure-batch\",\n",
    "        offer=\"ubuntu-server-container\",\n",
    "        sku=\"16-04-lts\",\n",
    "        version=\"latest\"  # URN: microsoft-azure-batch:ubuntu-server-container:16-04-lts:1.1.0\n",
    "        # The Azure Batch container image only accepts 'latest' version\n",
    "    )\n",
    "    \n",
    "    # Specify a container registry\n",
    "    container_registry = ContainerRegistry(\n",
    "        registry_server=registry_server,\n",
    "        user_name=registry_username,\n",
    "        password=registry_password\n",
    "    )\n",
    "    \n",
    "    container_conf = ContainerConfiguration(\n",
    "        container_image_names = [docker_image],\n",
    "        container_registries =[container_registry]\n",
    "    )\n",
    "            \n",
    "    vm_config = VirtualMachineConfiguration(\n",
    "        image_reference=image_ref,\n",
    "        container_configuration=container_conf,\n",
    "        node_agent_sku_id=\"batch.node.ubuntu 16.04\"\n",
    "    )\n",
    "    \n",
    "    new_pool = PoolAddParameter(\n",
    "        id=POOL_ID,\n",
    "        display_name=POOL_ID,\n",
    "        \n",
    "        vm_size=POOL_VM_SIZE,\n",
    "        target_dedicated_nodes=POOL_NODE_COUNT, # we only used dedicated nodes\n",
    "        \n",
    "        virtual_machine_configuration=vm_config\n",
    "    )\n",
    "    batch_service_client.pool.add(new_pool)\n",
    "\n",
    "def create_job():\n",
    "    pass\n",
    "\n",
    "def create_task():\n",
    "    \"\"\"\n",
    "    All Tasks should be idempotent as they may need to be retried due to a recovery operation.\n",
    "    \"\"\"\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "account_url = os.environ['BATCH_ACCOUNT_URL']\n",
    "\n",
    "app_client_id = os.environ['APP_CLIENT_ID']\n",
    "app_client_secret = os.environ['APP_CLIENT_SECRET']\n",
    "app_tenant_id = os.environ['APP_TENANT_ID']\n",
    "\n",
    "credentials = ServicePrincipalCredentials(\n",
    "    client_id=app_client_id,\n",
    "    secret=app_client_secret,\n",
    "    tenant=app_tenant_id,\n",
    "    resource=\"https://batch.core.windows.net/\"\n",
    ")\n",
    "\n",
    "# if using the Batch quota system, use https://docs.microsoft.com/en-us/python/api/azure-batch/azure.batch.batch_auth.sharedkeycredentials?view=azure-python\n",
    "# to authenticate instead of the service principal is also okay.\n",
    "\n",
    "batch_client = BatchServiceClient(credentials=credentials, batch_url=account_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    create_pool(batch_client, POOL_ID)\n",
    "except BatchErrorException as e:\n",
    "    print_batch_exception(e)\n",
    "    raise"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Submitting a job\n",
    "\n",
    "Job is what we have been referring to as Requests. Each shard corresponds to a Task.\n",
    "\n",
    "The Azure Batch service sets these environment variables on the compute nodes:\n",
    "\n",
    "- AZ_BATCH_JOB_ID\n",
    "\n",
    "- AZ_BATCH_TASK_ID\n",
    "- AZ_BATCH_TASK_DIR\n",
    "- AZ_BATCH_TASK_WORKING_DIR - currently running task has read/write access to this directory"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "job_id = 'test_docker0'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# job id is the request id in the old API context\n",
    "\n",
    "job = JobAddParameter(\n",
    "    id=job_id,\n",
    "    pool_info=PoolInformation(pool_id=POOL_ID),\n",
    ")\n",
    "\n",
    "batch_client.job.add(job)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Submit tasks to the job (the shards)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "command = \"\"\"/bin/sh -c \"python /app/score.py\" \"\"\"\n",
    "\n",
    "task = TaskAddParameter(\n",
    "    id='task_{}'.format(0),\n",
    "    command_line=command,\n",
    "    container_settings=TaskContainerSettings(\n",
    "        image_name=docker_image,\n",
    "        working_directory='taskWorkingDirectory'\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "azure.batch.models._models_py3.TaskAddParameter"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(task)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_client.task.add(job_id, task)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Monitoring a job\n",
    "\n",
    "Optimization: remember which tasks have already Completed so that we do not repeatedly query for their status.\n",
    "\n",
    "Documentation: https://docs.microsoft.com/en-us/azure/batch/batch-efficient-list-queries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "tasks = batch_client.task.list(job_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "tasks = [task for task in tasks]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "azure.batch.models._models_py3.CloudTask"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(tasks[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "incomplete_tasks = [task for task in tasks if\n",
    "                            task.state != TaskState.completed]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "incomplete_tasks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:cameratraps-batch-api]",
   "language": "python",
   "name": "conda-env-cameratraps-batch-api-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
